This library has been created to enable users to view examples of data visualisations utilising the NHS themes, using NHS public data sets. Users can load the required packages and create the sample data sets, and then choose which data visualisations they would like to run in their own environment.
The foundations of this library are sourced from a GGplot guide by Mike Perham. This has been expanded as a proof of concept for working collaboratively over regions to input and build upon, becoming the R Data Viz Library.
For more information about each data visualisation type, the below are recommended to review:
if (!require("pacman")) install.packages("pacman"); library(pacman)
pacman::p_load(Rcpp, tidyverse,dplyr,tidyr,
ggplot2,ggthemes,ggtext,scales,
png,ggalt,NHSRdatasets,onsr,shinycssloaders, plotly, networkD3, FunnelPlotR, NHSRplotthedots,remotes,devtools,DT)
# Install some packages directly from GitHub
remotes::install_github("rOpenSci/fingertipsR",
build_vignettes = TRUE,
dependencies = "suggests",
build = F)
devtools::install_github("ricardo-bion/ggradar")
library(ggradar)
All of the examples in this document use A&E dummy data from the NHSRdatasets package for NHS reporting, fingertips data for Public Health and ONS data for population data. These give us broad datasets that can be used for different data visualisation types.
More information on these packages can be found here:
https://github.com/nhs-r-community/NHSRdatasets
https://nhs-r-community.github.io/NHSRdatasets/
#Load initial dataset and clean up
Attends_1 <- NHSRdatasets::ae_attendances
Attends_1$org_code <- as.character(Attends_1$org_code)
Attends <- Attends_1 %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2") %>%
filter(type ==1) %>%
select(-c(3,5))
https://github.com/ropensci/fingertipsR
The below gets population by sex.
# Load Fingertips R
library(fingertipsR)
# Get available profiles in fingertips
profiles_data <- profiles()
print(profiles_data)
## # A tibble: 246 × 4
## ProfileID ProfileName DomainID DomainName
## <int> <chr> <int> <chr>
## 1 18 Smoking Profile 1938132885 Key indicators
## 2 18 Smoking Profile 1938132886 Smoking prevalence in …
## 3 18 Smoking Profile 1938132900 Smoking prevalence in …
## 4 18 Smoking Profile 1938132887 Smoking related mortal…
## 5 18 Smoking Profile 1938132888 Smoking related ill he…
## 6 18 Smoking Profile 1938132889 Impact of smoking
## 7 18 Smoking Profile 1938132890 Smoking quitters
## 8 19 Public Health Outcomes Framework 1000049 A. Overarching indicat…
## 9 19 Public Health Outcomes Framework 1000041 B. Wider determinants …
## 10 19 Public Health Outcomes Framework 1000042 C. Health improvement
## # ℹ 236 more rows
# Find profiles related to population
population_profiles <- profiles_data %>%
filter(grepl("population", ProfileName, ignore.case = TRUE))
print(population_profiles)
## # A tibble: 1 × 4
## ProfileID ProfileName DomainID DomainName
## <int> <chr> <int> <chr>
## 1 132 Populations 1938133081 Populations
# Search for indicators related to population structure, DomainID was found by viewing the population_profiles and selecting a DomainID
population_indicators <- indicator_metadata(DomainID = "1938133081") #Replace with DomainID from above
# Get the relevant indicator of the measure from your list of population_indicators
indicator_id <- 92708 # Replace with the actual indicator ID from the results above
#Get list of area types
area_types_data <- area_types()
# Get the data for the specific indicator
population_data <- fingertips_data(IndicatorID = indicator_id,AreaTypeID = "15") #Note for Area Types you can select all, but it will take a long time. 15 is England.
# Filter data to include only relevant columns and non-NA values
population_data_filtered <- population_data %>%
filter(!is.na(Age), !is.na(Value)) %>%
filter(Timeperiod == "2022") %>% # Alistair's addtion
select(AreaName, Sex, Age, Value)
# Adjust the values for plotting (male values negative for pyramid structure)
population_data_filtered <- population_data_filtered %>%
mutate(Value = ifelse(Sex == "Male", -Value, Value)) %>%
filter(Age != "All ages") %>%
filter(Sex != "Persons")
# Convert the Age column to a factor and specify the levels in the desired order
population_data_filtered$Age <- factor(population_data_filtered$Age, levels = c("0-4 yrs", "5-9 yrs", "10-14 yrs", "15-19 yrs", "20-24 yrs", "25-29 yrs", "30-34 yrs", "35-39 yrs", "40-44 yrs", "45-49 yrs", "50-54 yrs", "55-59 yrs", "60-64 yrs", "65-69 yrs", "70-74 yrs", "75-79 yrs", "80-84 yrs", "85-89 yrs","90+ yrs"))
https://medium.com/@VickyCrockett1/how-do-you-get-data-into-r-from-the-ons-c860043fef8c
The next step loads the data and performs some simple filtering steps.
All of the examples in this document use dummy data from the NHSRdatasets package (more information on this package can be found here: https://github.com/nhs-r-community/NHSRtheme). As the package is not in CRAN, you need to use devtools to load the package from github.
## cli (3.6.2 -> 3.6.3 ) [CRAN]
## digest (0.6.35 -> 0.6.37) [CRAN]
## rlang (1.1.3 -> 1.1.4 ) [CRAN]
## yaml (2.3.8 -> 2.3.10) [CRAN]
## xfun (0.44 -> 0.47 ) [CRAN]
## tinytex (0.51 -> 0.52 ) [CRAN]
## knitr (1.47 -> 1.48 ) [CRAN]
## bslib (0.7.0 -> 0.8.0 ) [CRAN]
## Rcpp (1.0.12 -> 1.0.13) [CRAN]
## colorspace (2.1-0 -> 2.1-1 ) [CRAN]
## rmarkdown (2.27 -> 2.28 ) [CRAN]
## servr (NA -> 0.31 ) [CRAN]
## xaringan (NA -> 0.30 ) [CRAN]
## package 'cli' successfully unpacked and MD5 sums checked
## package 'digest' successfully unpacked and MD5 sums checked
## package 'rlang' successfully unpacked and MD5 sums checked
## package 'yaml' successfully unpacked and MD5 sums checked
## package 'xfun' successfully unpacked and MD5 sums checked
## package 'tinytex' successfully unpacked and MD5 sums checked
## package 'knitr' successfully unpacked and MD5 sums checked
## package 'bslib' successfully unpacked and MD5 sums checked
## package 'colorspace' successfully unpacked and MD5 sums checked
## package 'rmarkdown' successfully unpacked and MD5 sums checked
## package 'servr' successfully unpacked and MD5 sums checked
## package 'xaringan' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## D:\TEMP\RtmpecgK96\downloaded_packages
## ── R CMD build ─────────────────────────────────────────────────────────────────
## checking for file 'D:\TEMP\RtmpecgK96\remotes4708284776f5\nhs-r-community-NHSRtheme-4829355/DESCRIPTION' ... ✔ checking for file 'D:\TEMP\RtmpecgK96\remotes4708284776f5\nhs-r-community-NHSRtheme-4829355/DESCRIPTION' (364ms)
## ─ preparing 'NHSRtheme': (784ms)
## checking DESCRIPTION meta-information ... checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information
## ─ checking for LF line-endings in source and make files and shell scripts (427ms)
## ─ checking for empty or unneeded directories
## Omitted 'LazyData' from DESCRIPTION
## ─ building 'NHSRtheme_0.1.0.tar.gz'
##
##
## DarkBlue Blue BrightBlue LightBlue AquaBlue Black DarkGrey
## "#003087" "#005EB8" "#0072CE" "#41B6E6" "#00A9CE" "#231f20" "#425563"
## MidGrey PaleGrey DarkGreen Green LightGreen AquaGreen Purple
## "#768692" "#E8EDEE" "#006747" "#009639" "#78BE20" "#00A499" "#330072"
## DarkPink Pink DarkRed Red Orange WarmYellow Yellow
## "#7C2855" "#AE2573" "#8A1538" "#DA291C" "#ED8B00" "#FFB81C" "#FAE100"
# Filter initial dataset
line_df <- Attends %>%
filter(org_code=="RXQ")
# Make plot
ggplot(line_df, aes(x = period, y = attendances)) +
geom_line(colour = "#005EB8", size = 1.5) +
scale_y_continuous(labels = comma) +
labs(title="Type 1 attendances - Bucks Healthcare",
subtitle = "April 2016 to March 2019",
y = "Attendances",
x = "Month") +
expand_limits(y = 0)
# Filter initial dataset
line_df <- Attends %>%
filter(org_code=="RXQ")
# Order the data frame by the date in ascending order - required for the chart to plot properly
line_df <- line_df[order(line_df$period),]
# Plotly Chart
plot_ly(line_df, x = ~period, y = ~attendances, type = 'scatter', mode = 'lines',
line = list(color = "#005EB8", width = 3, dash = 'line')) %>%
layout(title = "Type 1 attendances - Bucks Healthcare", # Set titles and axis labels
xaxis = list (title = "Month"),
yaxis = list (title ="Attendances", range = c(0, max(line_df$attendances) * 1.1))) %>% # Align the y axis scales with the ggplot chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage", # Remove any unnecessary functions of plotly chart
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
#Filter initial dataset
multiple_line_df <- Attends %>%
filter(org_code == "RXQ" | org_code=="RTH")
#Make plot
ggplot(multiple_line_df,
aes(x = period, y = attendances, colour = org_code)) +
geom_line(size = 1) +
geom_point() +
scale_colour_manual(values = c("#005EB8", "#41B6E6")) +
scale_y_continuous(labels = comma) +
labs(
title = "Type 1 attendances - Bucks Healthcare vs Royal Berkshire",
subtitle = "April 2016 to March 2019",
y = "Attendances",
x = "Month"
) +
expand_limits(y = 0) +
theme(legend.title = element_blank())
#Filter initial dataset for one trust
multiple_line_rxq <- Attends %>%
filter(org_code == "RXQ") %>%
rename(attendrxq=attendances)
#Filter initial dataset for another trust
multiple_line_rth <- Attends %>%
filter(org_code=="RTH") %>%
rename(attend_rth=attendances)
# Remove a couple of colums from both dataframes
line_rth = subset(multiple_line_rth, select = -c(admissions, org_code))
line_rxq = subset(multiple_line_rxq, select = -c(admissions, org_code))
# Combine data frames
multiple_line_data <-
merge(line_rth, line_rxq)
# Create plotly chart
plot_ly(multiple_line_data, x = ~period, y = ~multiple_line_data$attend_rth, name = 'RTH', type = 'scatter', mode = 'lines',
line = list(color = 'rgb(118, 134)', width = 3, dash = 'dot')) %>%
add_trace(y = ~multiple_line_data$attendrxq, name = 'RXQ', line = list(color = 'blue', width = 3, dash = 'solid'))%>% ##Plot a second line in NHS blue
layout(title = 'Type 1 attendances - Bucks Healthcare vs Royal Berkshire', # Set titles and axis labels
xaxis = list (title = "Time Period"),
yaxis = list (title ="Attendances", range = c(0, max(multiple_line_data$attend_rth) * 1.1))) %>% # Align the y axis scales with the ggplot chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage", # Remove any unnecessary functions of plotly chart
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
For more info see:- https://github.com/nhs-r-community/NHSRplotthedots
# Create a ggplot SPC using the NHSRplotthedots package
# Filter and format the data
sub_set <- ae_attendances %>%
filter(org_code == "RQM", type == 1, period < as.Date("2018-04-01"))
# Plot the chart
sub_set %>%
ptd_spc(value_field = breaches, date_field = period, improvement_direction = "decrease")
# Filter initial dataset
bar_df <- Attends %>%
filter(period == "2019-03-01")
# Make plot
bar <- ggplot(bar_df, aes(x = org_code, y = attendances)) +
geom_bar(stat = "identity",
position = "identity",
fill = "#005EB8") +
geom_hline(yintercept = 0,
size = 1,
colour = "#333333") +
scale_y_continuous(labels = comma) +
labs(
title = "Type 1 attendances",
subtitle = "March 2019",
y = "Attendances",
x = "Provider Code"
)
plot(bar)
Add labels
The code below adds labels to your simple bar chart.
#Filter initial dataset
bar + geom_text(aes(label = scales::comma(attendances)), vjust =2, color= "White")
# Filter the data to select just the most recent date
simple_bar_data <- Attends %>% filter(period == "2019-03-01")
#--------------------------------
# Plot the Chart
## Set bar colours
simple_bar_blue <- plot_ly(simple_bar_data, x = ~org_code, y = ~attendances, type = "bar", colors = '#005EB8') %>%
## Set titles and axis labels
layout(title = list(text = '<b>Type 1 attendances</b><br><sup>March 2019</sup>', x = 0, xanchor= 'left'),
xaxis = list(title = "Provider Code"),
yaxis = list(title = "Attendances"),
legend = list() ) %>%
## Remove any unnecessary functions of plotly chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
simple_bar_blue
Add labels
The code below adds labels to your simple bar chart.
#Plot the Chart
## Set bar colours
simple_bar_blue <- plot_ly(simple_bar_data, x = ~org_code, y = ~attendances, type = "bar", text = ~attendances, textposition = 'auto', marker = list(color = '#005EB8', line = list(colour ='#FFFFFF'), width = 1.5)) %>%
## Set titles and axis labels
layout(title = list(text = '<b>Type 1 attendances</b><br><sup>March 2019</sup>', x = 0, xanchor= 'left'),
xaxis = list(title = "Provider Code"),
yaxis = list(title = "Attendances"),
legend = list() ) %>%
## Remove any unnecessary functions of plotly chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
simple_bar_blue
# Filter initial dataset
grouped_bar_df <- Attends %>%
filter(period == "2017-03-01" | period == "2019-03-01") %>%
select(c(1:3))
# Make plot
ggplot(grouped_bar_df,
aes(
x = org_code,
y = attendances,
fill = as.factor(period)
)) +
geom_bar(stat = "identity", position = "dodge") +
geom_hline(yintercept = 0,
size = 1,
colour = "#333333") +
scale_y_continuous(labels = comma) +
#NHSRtheme::scale_fill_nhs('blues')+
labs(
title = "Attendances have increased in all providers other than Bucks Healthcare",
subtitle = "March 2017 vs March 2019",
y = "Attendances",
x = "Provider Code"
) +
theme(legend.title = element_blank())
# Filter initial dataset
grouped_bar_df_plotly <- Attends %>%
filter(period == "2017-03-01" | period == "2019-03-01") %>%
select(c(1:3))
# Change org_code to a character rather than a factor (needed for plotly x axis to work)
grouped_bar_df_plotly$org_code <- as.character(grouped_bar_df_plotly$org_code)
# Pivot so that org codes appear in different columns to allow grouped columns
grouped_bar_df_plotly <- pivot_wider(grouped_bar_df_plotly,names_from = period,values_from = attendances)
# Create chart
plot_ly(data = grouped_bar_df_plotly,
x = ~org_code,
y = ~`2017-03-01`,
marker = list(color = '#003087'),
name = '2017-03-01',
type = "bar")%>%
add_trace(data = grouped_bar_df_plotly,
x = ~org_code,
y = ~`2019-03-01`,
name = '2019-03-01',
marker = list(color = '#0072CE'))%>%
## Set titles and axis labels (note that there is no specific subtitle function in plotly so handled via annotations)
layout(title = 'Attendances have increased in all providers other than Bucks Healthcare<br><sub>March 2017 vs March 2019</sub>',
xaxis = list(title = "Provider code"),
yaxis = list(title = "Attendances"),
barmode = 'group')%>%
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage","pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
geom_bar(position = "stack", stat = "identity") +
scale_y_continuous(labels = comma) +
labs(title = "A&E attendances by department type - March 2017",
y = "Attendances",
x = "Provider Code") +
theme(legend.title = element_blank())
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
#Perform any data manipulation needed
AttendsAll$org_code <- as.character(AttendsAll$org_code)
t1 <- AttendsAll %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
t1<- t1 %>% pivot_wider(names_from = type, values_from = attendances)
#--------------------------------
#Plot the Chart
plot_ly(t1, x = ~org_code, y = ~`1`, type = "bar", marker = list(color = '#003087'), name = 'Type 1') %>% ##Set bar colours
add_trace(t1, x = ~org_code, y = ~`2`, marker = list(color = '#0072CE'), name = 'Type 2')%>% ## add next stack
add_trace(t1, x = ~org_code, y = ~other, marker = list(color = '#41B6E6'), name = 'Other')%>% ## add next stack
layout(title = 'A&E Attendances by department type - March 2017', ##set titles and axis labels
xaxis = list(title = "Organisation"),
yaxis = list(title = "Attendances"),
legend = list(),
barmode = 'stack') %>%
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage", ##remove any unnecessary functions of plotly chart
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01')
ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = percent) +
labs(title = "A&E attendances by department type - March 2017",
y = "Attendances",
x = "Provider Code") +
theme(legend.title = element_blank())
AttendsAll <- NHSRdatasets::ae_attendances %>%
filter(
org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
org_code == "RTK" | org_code == "RA2"
) %>%
filter(period == '2017-03-01') %>%
select(period,
org_code,
type,
attendances)
# Change org code to a character rather than factor data type
AttendsAll$org_code <- as.character(AttendsAll$org_code)
# Restructure data so it works for stacked bar chart
Attends_wide <-AttendsAll |>
pivot_wider(names_from = type, values_from = attendances, values_fill = 0)
#Create copy with values as percent for charts
Attends_wide_per<- Attends_wide
Attends_wide_per[, -c(1,2)] <- Attends_wide[, -c(1,2)] / rowSums(Attends_wide[, -c(1,2)])
#--------------------------------
#Plot the Chart
plot_ly(Attends_wide_per, x = ~org_code, y = ~`1`, type = "bar", marker = list(color = '#003087'), name = 'Type 1') %>% ##Set bar colours
add_trace(Attends_wide_per, x = ~org_code, y = ~`2`, marker = list(color = '#0072CE'), name = 'Type 2')%>% ## add next stack
add_trace(Attends_wide_per, x = ~org_code, y = ~other, marker = list(color = '#41B6E6'), name = 'Other')%>% ## add next stack
layout(title = 'Simple Blue Percentage Stacked Bar Chart', ##set titles and axis labels
xaxis = list(title = "Organisation Code"),
yaxis = list(title = "Attendances March 2017",
tickformat = ".0%"),
legend = list(),
barmode = 'stack') %>%
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage", ##remove any unnecessary functions of plotly chart
"pan2d", "autoScale2d", "resetScale2d", "zoom2d"))
Attends %>%
ggplot(aes(y=admissions, x = attendances, color = org_code)) +
geom_point() +
labs(title = 'Scatter plot - admissions vs attendances',
x = 'Attendances',
y = 'Admissions') +
scale_y_continuous(labels = label_number(suffix = " K", scale = 1e-3)) +
scale_x_continuous(labels = label_number(suffix = " K", scale = 1e-3)) +
theme(legend.title=element_blank())
plot_ly(Attends, x = ~attendances, y = ~admissions, type = 'scatter', mode = 'markers', color = ~org_code,
hoverinfo = "text", text = ~ paste("Trust:", org_code, "</br></br>",
"Attends:", attendances, "</br>",
"Admits:", admissions, "</br>",
"Date:", period) ) %>%
layout(title = 'Scatter plot', ##set titles and axis labels
xaxis = list(title = "Attendances"),
yaxis = list (title = "Admissions"),
legend = list()) %>%
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",
"pan2d", "autoScale2d", "resetScale2d", "zoom2d")) ##remove any unnecessary functions
#For this example we are filtering on 5 organisations, type 1 activity & excluding column 3 from the dataframe.
AttendsBub <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2")
# Summarise data for type 1 attendances
type_1_summary <- AttendsBub %>%
filter(type == 1) %>%
group_by(org_code, period) %>%
summarise(type_1_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
# Summarise data for non-type 1 attendances
type_other_summary <- AttendsBub %>%
filter(type == 3|type== 2) %>%
group_by(org_code, period) %>%
summarise(type_other_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
# Summarise total attendances and total admissions
total_summary <- AttendsBub %>%
group_by(org_code, period) %>%
summarise(
total_attendances = sum(attendances, na.rm = TRUE),
total_breaches = sum(breaches, na.rm = TRUE),
total_admissions = sum(admissions, na.rm = TRUE)
) %>%
ungroup()
# Merge the summaries into a single data frame
final_summary <- total_summary %>%
left_join(type_1_summary, by = c("org_code", "period")) %>%
left_join(type_other_summary, by = c("org_code", "period"))
# Replace NA values with 0 for type_1_attendances and type_3_attendances
final_summary <- final_summary %>%
mutate(
type_1_attendances = replace_na(type_1_attendances, 0),
type_other_attendances = replace_na(type_other_attendances, 0)
)
# Add percentage columns
final_summary <- final_summary %>%
mutate(
perc_admissions_attendances = (total_admissions / total_attendances) * 100,
perc_type1_attendances_total = (type_1_attendances / total_attendances) * 100,
perc_breaches_attendances = (total_breaches / total_attendances) * 100
)
# Filter initial dataset
bubble_df <- final_summary
# Calculate size for bubble chart (proportional to type 1 attendances)
bubble_df <- bubble_df %>%
mutate(size = perc_type1_attendances_total / max(perc_type1_attendances_total) * 100)
# Create bubble chart
ggplot(bubble_df, aes(x = perc_admissions_attendances, y = perc_breaches_attendances, size = size, color = size)) +
geom_point(alpha = 0.5) +
scale_size_continuous(name = "Proportion of type 1") +
#NHSRtheme::scale_fill_nhs('blues', name = "Proportion of type 1") +
labs(title = "Bubble Chart of % 4 Hour Breaches vs % converted to admission with % Attendances Type 1 Size",
x = "Conversion Rate", y = "% 4 Hour Breaches") +
theme(legend.position = "right")
#For this example we are filtering on 5 organisations, type 1 activity & excluding column 3 from the dataframe.
AttendsBub <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2")
# Summarise data for type 1 attendances
type_1_summary <- AttendsBub %>%
filter(type == 1) %>%
group_by(org_code, period) %>%
summarise(type_1_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
# Summarise data for non-type 1 attendances
type_other_summary <- AttendsBub %>%
filter(type == 3|type== 2) %>%
group_by(org_code, period) %>%
summarise(type_other_attendances = sum(attendances, na.rm = TRUE)) %>%
ungroup()
# Summarise total attendances and total admissions
total_summary <- AttendsBub %>%
group_by(org_code, period) %>%
summarise(
total_attendances = sum(attendances, na.rm = TRUE),
total_breaches = sum(breaches, na.rm = TRUE),
total_admissions = sum(admissions, na.rm = TRUE)
) %>%
ungroup()
# Merge the summaries into a single data frame
final_summary <- total_summary %>%
left_join(type_1_summary, by = c("org_code", "period")) %>%
left_join(type_other_summary, by = c("org_code", "period"))
# Replace NA values with 0 for type_1_attendances and type_3_attendances
final_summary <- final_summary %>%
mutate(
type_1_attendances = replace_na(type_1_attendances, 0),
type_other_attendances = replace_na(type_other_attendances, 0)
)
# Add percentage columns
final_summary <- final_summary %>%
mutate(
perc_admissions_attendances = (total_admissions / total_attendances) * 100,
perc_type1_attendances_total = (type_1_attendances / total_attendances) * 100,
perc_breaches_attendances = (total_breaches / total_attendances) * 100
)
# Filter initial dataset
bubble_df <- final_summary
# Calculate size for bubble chart (proportional to type 1 attendances)
bubble_df <- bubble_df %>%
mutate(size = perc_type1_attendances_total / max(perc_type1_attendances_total) * 100)
# Create bubble chart
plot_ly(bubble_df, x = ~perc_admissions_attendances, y = ~perc_breaches_attendances, text = "Proportion of type 1", type = 'scatter', mode = 'markers', color = ~size, colors = 'Blues', size = ~size, sizes = c(5,20),
marker = list(sizemode = 'diameter', opacity = 0.7)) %>%
layout(title = 'Bubble Chart of % 4 Hour Breaches vs \n % converted to admission with % Attendances Type 1 Size',
xaxis = list(showgrid = FALSE, title = "Conversion Rate"),
yaxis = list(showgrid = FALSE, title = "% 4 Hour Breaches"))
# Plot the population pyramid
ggplot(population_data_filtered, aes(x = Age, y = Value, fill = Sex)) +
geom_bar(stat = "identity", position = "identity") +
coord_flip() +
scale_y_continuous(labels = function(x) comma(abs(x))) +
labs(title = "Population Age Profile by Gender",
x = "Age Group",
y = "Population Count",
fill = "Gender") +
NHSRtheme::scale_fill_nhs("blues")
population_data_filtered %>%
# mutate(population = ifelse(test = gender == "M", yes = -population, no = population)) %>%
mutate(abs_pop = abs(Value)) %>%
plot_ly(x= ~Value, y=~Age, color=~Sex) %>%
add_bars(orientation = 'h', hoverinfo = 'text', text = ~abs_pop) %>%
layout(bargap = 0.1, barmode = 'overlay',
xaxis = list(tickmode = 'array', tickvals = c(-2000000, -1000000, 0, 1000000, 2000000),
ticktext = c('2M', '1M', '0', '1M', '2M')))
Further information on ggradar can be found via the GitHub Repository
# Create Example dataset
data <- tibble::tribble(
~group, ~Trauma_and_Orthopaedics, ~General_Surgery, ~Gynaecology, ~Ophthalmology, ~Dermatology,
"Trust 1", 0.8, 0.9, 0.7, 0.6, 0.9,
"Trust 2", 0.6, 0.7, 0.8, 0.7, 0.6,
"Trust 3", 0.7, 0.8, 0.9, 0.8, 0.7
)
# Create the radar chart
ggradar(data,
values.radar = c("0%", "50%", "100%"), #Sets labels for min, min & max grid lines
grid.min = 0, grid.mid = 0.5, grid.max = 1, #Defines the values for the grid lines
group.line.width = 2, #Changes the line width of plotted line
group.point.size = 3, #Changes the point size of plotted line
group.colours = c("#00AFBB", "#E7B800", "#FC4E07"), #Colour for plotted lines based on group
background.circle.colour = "white", #Plot background colour
gridline.mid.colour = "grey", #Colour of gridlines
legend.position = "bottom") #Position can be
# Create a radial column chart
ggplot(Attends, aes(x = reorder(org_code, -attendances), y = attendances)) +
geom_col(width = 0.5, fill = "skyblue") +
coord_polar(start = 0) +
#NHSRtheme::scale_fill_nhs("blues") +
scale_y_continuous(labels = function(x) comma(abs(x))) +
labs(title = "Attendances per Trust",
x = NULL, y = NULL)
data <- Attends %>% group_by(org_code) %>%
summarise(sum_attends = sum(attendances))
data$angle = c(0, 72, 144, 216, 288)
plot_ly(
type = 'barpolar',
r = data$sum_attends,
theta = data$angle
) %>%
layout(
title = 'Attendances per Trust',
polar = list(
radialaxis = list(
visible = TRUE,
range = c(0, 500000)
),
angularaxis = list(
tickmode = 'array',
tickvals = data$angle,
ticktext = data$org_code,
ticklen = 15 # This indirectly adjusts spacing by lengthening tick marks
),
bargap = 0.5, # This adjusts the gap between bars
bargroupgap = 0.5 # This adjusts the gap between bar groups
)
)
# Summarize the data
summary_data <- Attends %>%
group_by(org_code) %>%
summarise(min_attendance = min(attendances),
max_attendance = max(attendances))
# Create the horizontal bar range chart
ggplot(summary_data, aes(y = org_code)) +
geom_linerange(aes(xmin = min_attendance, xmax = max_attendance), color = "blue", size = 1.5) +
labs(title = "Range of Type 1 Attendances by Trust between 2019 and 2023",
x = "Number of Attendances",
y = "Organisation Code") +
NHSRtheme::scale_fill_nhs("blues")
# Summarize the data
summary_data_pl <- Attends %>%
group_by(org_code) %>%
summarise(min_attendance = min(attendances),
max_attendance = max(attendances))
# Create the horizontal bar range chart
plot_ly(summary_data_pl, color=I("blue")) %>%
add_segments(x = ~min_attendance, xend = ~max_attendance, y = ~org_code, yend = ~org_code) %>%
layout(
title = "Range of Type 1 Attendances by Trust between 2019 and 2023",
xaxis = list(title = "Number of Attendances"),
yaxis = list(title = "Organisation Code"), #, categoryorder = "category descending"),
margin = list(l = 65)
) %>%
config(modeBarButtonsToRemove = c(
"zoomIn2d", "zoomOut2d", "select2d", "lasso2d", # "toImage",
"pan2d", "autoScale2d", "zoom2d"
)) # ,"resetScale2d", "hoverClosestCartesian"))
# Extract the data
AttendsSAC <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2")
# Plot the stacked area chart
ggplot(AttendsSAC, aes(x = period, fill = org_code)) +
geom_area(stat = "count") +
NHSRtheme::scale_fill_nhs("blues")
# Set the colours for the chart
blues <- c("#00A9CE", "#41B6E6", "#0072CE", "#005EB8","#003087" )
# Extract the data
AttendsSAC <- NHSRdatasets::ae_attendances %>%
filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
org_code == "RTK"|org_code == "RA2") |>
# sort by unique code in reverse so the chart is stacked in opposite alphabet order
mutate(org_code = factor(org_code, levels = rev(sort(unique(org_code)))))
# Plot the stacked area chart
plot_ly(data = AttendsSAC |> group_by(period, org_code) |> summarise(count = n()) |> ungroup(),
x = ~period,
y = ~count,
color = ~org_code,
colors = blues,
type = 'scatter',
mode = 'lines',
stackgroup = 'one',
line = list(colors = blues),
stackgroup = 'one') |>
# solid fill colours to replicate the ggplot style
style(traces = 1, fillcolor = "#00A9CE") |>
style(traces = 2, fillcolor = "#41B6E6") |>
style(traces = 3, fillcolor = "#0072CE") |>
style(traces = 4, fillcolor = "#005EB8") |>
style(traces = 5, fillcolor = "#003087") |>
layout(legend = list(title = list(text = "org_code")),
xaxis = list(tickformat = "%Y",
dtick = "M12"))
Use facet_wrap() to create multiple charts split by
subgroup in data. You can use ncol = or nrow
to specify number of rows or columns. For example,
facet_wrap(~org_code, nrow=1) to put all charts in a single
row.
ggplot(Attends, aes(x = period, y = attendances)) +
geom_line(colour = "#005EB8", size = 1) +
facet_wrap(~org_code)+
labs(title="Type 1 attendances",
subtitle = "April 2016 to March 2019") +
expand_limits(y = 0)
g <- ggplot(Attends, aes(org_code, attendances))
g + geom_boxplot(varwidth=T, fill="light blue") +
labs(title="A&E Attendances",
subtitle="Distribution by Trust",
caption="Source: A&E Monthly Stats",
x="Trust",
y="A&E attendances")
#Create a sample dataset - replace with your own
name = c("Trust A","Trust B","Trust C","Trust D","Trust E","Trust F","Trust G","Trust H","Trust I","Trust J")
count= c(89,85,76,64,50,45,29,20,10,5)
#Create a dataframe using sample data. Convert name to factor.
data = data.frame(name, count, stringsAsFactors = TRUE)
#Sets the coloured background
bullet_base <- data.frame(rank = c("Poor", "Ok", "Good", "Excellent"),
value = c(25, 25, 25, 25))
bullet_base_rep <-
do.call("rbind", replicate(nrow(data), bullet_base, simplify = FALSE)) %>%
mutate(name = sort(rep(data$name, 4) ))
#Colour the background bars
bullet_colors <- c("#e44727", "#e4a727", "#61AB40", "#318100")
names(bullet_colors) <- c("Poor", "Ok", "Good", "Excellent")
#Create plot
ggplot() +
geom_bar(data = bullet_base_rep,
aes(x = name, y = value, fill = rank), stat = "identity",
position = "stack") +
geom_bar(data = data,
aes(x = name, y = count), fill = "black", width = .2,
stat = "identity") +
scale_fill_manual(values = bullet_colors) +
coord_flip(expand = FALSE)
For further information, see the FunnelPlotR GitHub Repository
MyAttends <- NHSRdatasets::ae_attendances %>%
filter(period == '2017-03-01') %>%
filter(type ==1) # %>%
# select(-c(3,5))
MyAttends$org_code <- as.character(MyAttends$org_code)
funnel_plot(.data = MyAttends,
numerator= breaches, #Specify the numerator
denominator=attendances, #Specify the denominator
group = org_code, #Specify that we want to plot Trust Names
title = "A&E breaches", #Specify the chart title
draw_adjusted = TRUE, #Specify that we want to adjust the control limits to account for over-dispersion
sr_method = "SHMI", #Specify to adjust for over-dispersion using the CQC Methodology (can also use SHMI)
# label = "highlight", #Specify that we want to use the 'highlight' argument to show outliers
# highlight=HighLight, #Get the highlight argument to reference the list of outlier NEY trusts
data_type="PR", #Specify the indicator is a proportion
limit=95, #Specify to show both 95 and 99.8% control limits
multiplier = 100,
y_label = "% breaches", #Specify the X Axis Label
x_label = "No. of attendances") #Specify the Y Axis Label
## A funnel plot object with 139 points of which 13 are outliers.
## Plot is adjusted for overdispersion.
# Load the dataset
df <- read.csv("https://cdn.rawgit.com/plotly/datasets/master/GanttChart-updated.csv",
stringsAsFactors = FALSE)
# Define the start date of each task & convert to date
df$Start <- as.Date(df$Start, format = "%m/%d/%Y")
# Define the end of the task by adding the duration to the start date
df$End <- df$Start + df$Duration
# Prepare the data for plotting
gantt_data <- df %>%
mutate(task_id = row_number()) %>%
select(task_id, Task, Start, End, Resource)
# Create the Gantt chart using ggplot2
gantt_chart <- ggplot(gantt_data, aes(x = Start, xend = End, y = reorder(Task, -task_id), yend = reorder(Task, -task_id), color = factor(Resource))) +
geom_segment(size = 5) +
labs(title = "Gantt Chart with NHS Blue Shades",
x = "Date",
y = "Task") +
NHSRtheme::theme_nhs() +
scale_color_manual(values = colorRampPalette(c("#005EB8", "#73C2FB"))(length(unique(gantt_data$Resource))))
# Print the Gantt chart
print(gantt_chart)
#Violin Plot
ggplot(Attends, aes(org_code, attendances)) + geom_violin() +
labs(title="A&E Attendances", subtitle="Range by Trust", caption="Source: A&E Monthly Statistics", x="Trust", y="Attendances") + scale_fill_brewer(palette="Blues") + theme_classic()
The ggtext package can be used to add colour to titles
or subtitles. You need to ensure that you use it with
theme(plot.subtitle = element_markdown(hjust = 0, size = 12))
otherwise it will not work.
#Prepare data
dumbbell_df <- NHSRdatasets::ae_attendances %>%
filter(type ==1) %>%
select(-c(3,6)) %>%
filter(period == "2017-03-01" | period =="2019-03-01") %>%
mutate(period =as.numeric(format(period,'%Y'))) %>%
mutate(period = as.character(period)) %>%
mutate(performance = 1- (breaches/attendances)) %>%
select(c(1:2,5)) %>%
spread(period, performance) %>%
mutate(gap = `2019` - `2017`) %>%
arrange(desc(gap)) %>%
head(10)
#Make plot
dumbell <- ggplot(dumbbell_df, aes(x = `2017`, xend = `2019`, y = reorder(org_code, gap), group = org_code)) +
geom_dumbbell(colour = "#dddddd",
size = 3,
colour_x = "#41B6E6",
colour_xend = "#005EB8") +
scale_x_continuous(labels = scales::percent_format(accuracy=1))+
geom_vline(xintercept = 0.95, size = 1, colour="#333333", linetype = "dashed") +
labs(title = "Performance improved for all providers",
subtitle = "<span style='color: #41B6E6;'>March 2017 <span><span style='color: black;'> vs <span><span style='color: #005EB8;'>March 2019<span>") +
xlab("4 hour performance") +
ylab("Org code") +
theme(plot.subtitle = element_markdown(hjust = 0, size = 12))+ theme(legend.position = "none")
plot(dumbell)
Adding annotations
You can use geom_label to add annotations to existing
plots or you can add line in when creating ggplot.
dumbell + geom_label(aes(x = 0.9, y = "R1K",label = "Standard"),
hjust = -0.5,
vjust = -0.1,
colour = "#555555",
label.size = NA,
family="Arial",
size = 4)
# Initial data wrangling
dumbbellplotly_df <- dumbbell_df
dumbbellplotly_df$org_code <- as.character(dumbbellplotly_df$org_code)
dumbbellplotly_df$`2017`<- round(dumbbellplotly_df$`2017`*100,1)
dumbbellplotly_df$`2019`<- round(dumbbellplotly_df$`2019`*100,1)
dumbbellplotly_df$`gap`<- round(dumbbellplotly_df$`gap`*100,1)
# Region chart This sets up a blank plotly object to begin with
dumbellplotly<- plot_ly(dumbbellplotly_df, color = I("gray80"))
# It then adds a line for each 'org_code that starts at 2017 and goes through to the 2019 number
dumbellplotly <-dumbellplotly %>% add_segments(x = ~dumbbellplotly_df$`2017`, xend = ~dumbbellplotly_df$`2019`, y = ~org_code, yend = ~org_code, showlegend = FALSE)
# It then adds a dot on the 2017 end of the line in red
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2017`, y = ~org_code, name = "2017", color = I("#AE2573"),size = 8 )
# It then adds a dot on the 2019 end of the line in blue
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2019`,y = ~org_code, name = "2019", color = I("#0072CE"),size = 8 )
#The titles are then added
dumbellplotly<- dumbellplotly %>% layout( title = "Performance improved for all providers",
xaxis = list(title = "4 hour performance", ticksuffix="%"), margin = list(l = 65),
shapes = list(list(type = "line",
x0 = 95,
x1 = 95,
y0 = 0,
y1 = "RXC",
ref = "x",
yref = "y",
line = list(color="#333333",dash = "dash"))),
annotations = list(
list(
x = 95,
y = "RXC",
xref = "x",
yref = "y",
text = "Standard (95%)",
showarrow = TRUE,
arrowhead = 2,
# ax = 20,
# ay = -30,
font = list(color = "555555#")
)))
dumbellplotly
Further information on the sankeyNetwork function can be found on the Rdocumentation website
Sankey_data<- NHSRdatasets::LOS_model %>%
group_by(Organisation,Age) %>%
summarise(Attends= sum(LOS))
Sankey_data<- filter ( Sankey_data, Age >90) %>%
arrange(Organisation, Age)
##The first step is to designate nodes this just writes a list of all the Different 'Source' and 'Target' Nodes
nodes <- data.frame(
name=c(as.character(Sankey_data$Organisation),
as.character(Sankey_data$Age)) %>% unique())
# With networkD3, you need to provide the details of the connection between 'nodes' these must be provided using an 'id'
##, not using real name like in the links dataframe. So we need to reformat it.
Sankey_data$IDsource <- match(Sankey_data$Organisation, nodes$name)-1
Sankey_data$IDtarget <- match(Sankey_data$Age, nodes$name)-1
Sankey_data<-Sankey_data %>% arrange(IDsource,IDtarget)
##Make the sankey network , Iterations=0 means that the the data is displayed according to how it is sorted in the initial df, starting with the source node
sankey <- networkD3::sankeyNetwork(Links = as.data.frame.matrix(Sankey_data), Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "Attends", NodeID = "name", sinksRight = TRUE, fontSize = 18,iterations =0)
sankey <- htmlwidgets::prependContent(sankey, htmltools::tags$h4("Attends by Age"))
sankey